set more off
label drop _all
*cd "C:\Documents and Settings\elena\My Documents\Documents\Resume\"
cd "C:\Documents and Settings\leigha\My publications\Audit Discrimination\

/*
use combine_file1,clear
replace typeofresponse="interview" if typeofresponse=="intrview"
replace typeofresponse="negative" if typeofresponse=="nagative"
replace typeofresponse="negative" if typeofresponse=="negarive"
replace typeofresponse="interview" if typeofresponse=="call" | typeofresponse=="e-mail" | typeofresponse=="offer other possition"
replace typeofresponse="" if typeofresponse=="need apply from Angus and Robertson"
replace dateofapplication="8/31/2007" if nameofthecompany=="Huntingdale Golf Club"
gen unique_identifier=_n
save temp_combine, replace
* Export name files
keep unique_identifier nameofcontacperson nameofrespperson
for var nameofcontacperson nameofrespperson: replace X="Narelle Tognini" if X=="NarelleTognini" \ replace X="Sue" if X=="Sue and Julie"
gen forename=word(nameofcontacperson,1)
gen surname=word(nameofcontacperson,2)
replace surname="?" if surname=="" | surname=="or" | surname=="and"
replace forename="Michael" if forename=="Micheal"
outsheet unique_identifier forename surname using temp_contact.csv,replace comma 
replace forename=word(nameofrespperson,1)
replace surname=word(nameofrespperson,2)
replace surname="?" if surname=="" | surname=="or" | surname=="and"
replace forename="Michael" if forename=="Micheal"
outsheet unique_identifier forename surname using temp_response.csv,replace comma 
* At this point, I use Onomap to classify names, and save its results as temp_contact_onomap & temp_response_onomap
for any temp_contact_onomap temp_response_onomap: insheet using X.csv, clear \ sort unique_identifier \ save X, replace
use temp_combine, clear
sort unique_identifier
merge unique_identifier using temp_contact_onomap, keep(onomapgroup onomapsubgroup)
drop _merge
ren onomapgroup contact_ethnicity_group
ren onomapsubgroup contact_ethnicity_subgroup
sort unique_identifier
merge unique_identifier using temp_response_onomap, keep(onomapgroup onomapsubgroup)
drop _merge
ren onomapgroup response_ethnicity_group
ren onomapsubgroup response_ethnicity_subgroup
* Generating company ID (existing one starts to repeat after a while)
gen company_unique_id=int((_n-1)/4)
* Assigning response person ethnicity to all applicants
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n+1] if company_unique_id==company_unique_id[_n+1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n+1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n-1] if company_unique_id==company_unique_id[_n-1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n-1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n+1] if company_unique_id==company_unique_id[_n+1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n+1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n-1] if company_unique_id==company_unique_id[_n-1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n-1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n+1] if company_unique_id==company_unique_id[_n+1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n+1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n-1] if company_unique_id==company_unique_id[_n-1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n-1]~="UNCLASSIFIED"
for any group subgroup: replace response_ethnicity_X=response_ethnicity_X[_n+1] if company_unique_id==company_unique_id[_n+1] & response_ethnicity_X=="UNCLASSIFIED" & response_ethnicity_X[_n+1]~="UNCLASSIFIED"
save new_combine, replace

*/

use new_combine, clear

* Sample sizes
tabstat companynumber, by(racesex) stat(n)
tabstat companynumber, by(city) stat(n)
tabstat companynumber, by(job) stat(n)
sum companynumber if racesex==1 & city=="Sydney" & job==2

* Merging unemployment data
gen date_application=date(dateofapplication,"MDY")
format date_application %d
gen month=month(date_application)
gen state="vic" if city=="Melbourne"
replace state="qld" if city=="Brisbane"
replace state="nsw" if city=="Sydney"
sort month state
merge month state using unemployment_rates, nokeep


gen interview=0
replace interview=1 if typeofresponse=="interview"

tab  racesex  job if city=="Brisbane"
tab  racesex  job if city=="Melbourne"
tab  racesex  job if city=="Sydney"

tab  racesex  job if city=="Brisbane" & typeofresponse=="interview"
tab  racesex  job if city=="Melbourne" & typeofresponse=="interview"
tab  racesex  job if city=="Sydney" & typeofresponse=="interview"

gen a_female=1
replace a_female=0 if int(racesex/2)==racesex/2

gen a_aborig=0 if racesex>=7 & racesex<=10
replace a_aborig=1 if racesex==1 | racesex==2
gen a_chinese=0 if racesex>=7 & racesex<=10
replace a_chinese=1 if racesex==3 | racesex==4
gen a_italian=0 if racesex>=7 & racesex<=10
replace a_italian=1 if racesex==5 | racesex==6
gen a_prot=0 if racesex>=7 & racesex<=10
replace a_prot=1 if racesex==7 | racesex==8
gen a_cath=0 if racesex>=7 & racesex<=10
replace a_cath=1 if racesex==9 | racesex==10
gen a_mideast=0 if racesex>=7 & racesex<=10
replace a_mideast=1 if racesex==11 | racesex==12

* Religion
prtest interview,by(a_cath) 
bysort city: prtest interview,by(a_cath) 

* Ethnicity
recode racesex 11=7 12=8 7=9 8=10 9=11 10=12
label define racesex 1 "Aborig F" 2 "Aborig M" 3 "Chinese F" 4 "Chinese M" 5 "Italian F" 6 "Italian M" 7 "MidEast F" 8 "Mideast M" 9 "Protestant F" 10 "Protestant M" 11 "Catholic F" 12 "Catholic M" 
label values racesex racesex
recode race 6=4 4=5 5=6
label define race 1 "Indigenous" 2 "Chinese" 3 "Italian" 4 "MidEast" 5 "Protestant" 6 "Catholic" 
label values race race
label define female 0 "Male" 1 "Female"
ren a_female female 
label values female female

*********************************
* Note: current version drops Catholics
*********************************
* Collapse Catholic & Protestant into a single "Anglo" category
*recode race 6=5
* Dropping Catholics
drop if race==6

label define race 1 "Indigenous" 2 "Chinese" 3 "Italian" 4 "MidEast" 5 "Anglo", modify 
label values race race
recode racesex 11=9 12=10
label define racesex 1 "Indigenous F" 2 "Indigenous M" 3 "Chinese F" 4 "Chinese M" 5 "Italian F" 6 "Italian M" 7 "MidEast F" 8 "Mideast M" 9 "Anglo F" 10 "Anglo M", modify
label values racesex racesex

*********************************
* Tables for paper
*********************************
* Combined
tabstat interview, by(race) stat(mean n) format(%9.2f)
bysort race: egen temp1=mean(interview)
gen temp2=temp1 if race==5
egen temp3=mode(temp2)
gen ratio=temp3/temp1
drop temp* 
tabstat ratio, by(race) stat(mean) format(%9.2f)
drop ratio
for any aborig italian chinese mideast: prtest interview ,by(a_X) 
tabstat interview, by(racesex) stat(mean n) format(%9.2f)
tabstat interview, by(female) stat(mean n) format(%9.2f)
prtest interview, by(female) 
bysort job: prtest interview, by(female) 

* Benchmarking the results
*dprobit interview u_person 
*xi: dprobit interview u_person i.city 
dprobit interview u_person if a_aborig==0
sum u_person 
gen hypoth_urate=.
for num 1/4: egen temp=mean(interview) if race==X \ replace hypoth_urate=((.35-temp)/.065)+4.3 if race==X \ drop temp
tabstat hypoth_urate,by(race)
drop hypoth_urate

* Females and males separately
for num 1 0: tabstat interview if female==X, by(race) stat(mean n) format(%9.2f)
for num 1 0: bysort race: egen temp1=mean(interview) if female==X \ gen temp2=temp1 if race==5 & female==X \ egen temp3=mode(temp2) if female==X \ gen ratioX=temp3/temp1 if female==X \ drop temp* 
for num 1 0: tabstat ratioX, by(race) stat(mean) format(%9.2f)
drop ratio*
for any aborig chinese italian mideast: prtest interview if female==1,by(a_X) 
for any aborig chinese italian mideast: prtest interview if female==0,by(a_X) 
* Are racial differences significant between men and women of the same ethnicity?
for any aborig italian chinese mideast: gen X=0 \ replace X=1 if a_X==1 \ gen f_X=0 \ replace f_X=1 if female==1 & a_X==1
dprobit interview female aborig italian chinese mideast f_*
testparm f_*
drop f_*

* By city
bysort city: tabstat interview, by(race) stat(mean n) format(%9.2f)
for any Brisbane Melbourne Sydney: bysort race: egen temp1=mean(interview) if city=="X" \ gen temp2=temp1 if race==5 & city=="X" \ egen temp3=mode(temp2) if city=="X" \ gen ratio_X=temp3/temp1 if city=="X" \ drop temp* 
for any Brisbane Melbourne Sydney: tabstat ratio_X, by(race) stat(mean n) format(%9.2f)
drop ratio*
for any aborig chinese italian mideast: prtest interview if city=="Brisbane", by(a_X) 
for any aborig chinese italian mideast: prtest interview if city=="Melbourne", by(a_X) 
for any aborig chinese italian mideast: prtest interview if city=="Sydney", by(a_X) 
* Does discrimination differ across cities?
for any aborig italian chinese mideast: gen b_X=0 \ replace b_X=1 if city=="Brisbane" & a_X==1
for any aborig italian chinese mideast: gen s_X=0 \ replace s_X=1 if city=="Sydney" & a_X==1
xi: dprobit interview aborig italian chinese mideast s_* i.city if city~="Brisbane"
testparm s_* 
xi: dprobit interview aborig italian chinese mideast b_* i.city if city~="Melbourne"
testparm b_* 
xi: dprobit interview aborig italian chinese mideast b_* i.city if city~="Sydney"
testparm b_* 
drop s_* b_*

*gen anglo=1 if race==5
*replace anglo=0 if race<=4
*bysort city: tabstat interview, by(anglo)
*bysort city: tabstat interview, by(female) stat(mean n)
* Note that "all Anglo" is the base case
*for any aborig italian chinese mideast: prtest interview,by(a_X) 
*for any aborig italian chinese mideast: prtest interview if city=="Sydney",by(a_X) 
*for any aborig italian chinese mideast: prtest interview if city=="Melbourne",by(a_X) 
*for any aborig italian chinese mideast: prtest interview if city=="Brisbane",by(a_X) 

* Type of job applied for? 
ren job jobtype
label define jobtype 1 "Waitstaff" 2 "Data entry" 3 "Customer service" 4 "Sales"
label values jobtype jobtype
bysort jobtype: tabstat interview, by(race) stat(mean n) format(%9.2f)
for num 1/4: bysort race: egen temp1=mean(interview) if jobtype==X \ gen temp2=temp1 if race==5 & jobtype==X \ egen temp3=mode(temp2) if jobtype==X \ gen ratio_X=temp3/temp1 if jobtype==X \ drop temp* 
for num 1/4: tabstat ratio_X, by(race) stat(mean) format(%9.2f)
drop ratio*
for any aborig chinese italian mideast: prtest interview if jobtype==1, by(a_X) 
for any aborig chinese italian mideast: prtest interview if jobtype==2, by(a_X) 
for any aborig chinese italian mideast: prtest interview if jobtype==3, by(a_X) 
for any aborig chinese italian mideast: prtest interview if jobtype==4, by(a_X) 
* Does discrimination differ between waitstaff & data entry?
for any aborig italian chinese mideast: gen w_X=0 \ replace w_X=1 if jobtype==1 & a_X==1
xi: dprobit interview aborig italian chinese mideast w_* i.jobtype if jobtype<=2
testparm w_* 
drop w_*

* CV quality
tabstat interview if race==5,by(cvnumber)
gen highqualitycv=0
for num 2 4 5 6 10 11 13 16: replace highqualitycv=1 if cvnumber==X
tabstat interview if race==5, by(highqualitycv)
for num 0 1: bysort race: egen temp1=mean(interview) if highqualitycv==X \ gen temp2=temp1 if race==5 & highqualitycv==X \ egen temp3=mode(temp2) if highqualitycv==X \ gen ratio_X=temp3/temp1 if highqualitycv==X \ drop temp* 
bysort highqualitycv: tabstat interview, by(race) stat(mean n) format(%9.2f)
for num 0 1: tabstat ratio_X, by(race) stat(mean) format(%9.2f)
drop ratio*
for any aborig chinese italian mideast: prtest interview if highqualitycv==0, by(a_X) 
for any aborig chinese italian mideast: prtest interview if highqualitycv==1, by(a_X) 
* Does discrimination differ between high and low quality CVs?
for any aborig italian chinese mideast: gen w_X=0 \ replace w_X=1 if highqualitycv==1 & a_X==1
xi: dprobit interview aborig italian chinese mideast w_* i.jobtype if jobtype<=2
testparm w_* 
drop w_*

/*
*********************************
* Graphs for powerpoint
*********************************
* Overall graph
egen temp1=mean(interview) if race==5 
egen temp2=max(temp1) 
bysort race: egen temp3=mean(interview) 
gen discrim=temp3-temp2
drop temp*
sum interview if race==5 
global mean=int(r(mean)*100) 
gr bar discrim if race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.15 .05)) ylab(-.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e))
drop discrim

* Graph by ethnicity & sex
bysort female: egen temp1=mean(interview) if race==5 
bysort female: egen temp2=max(temp1) 
bysort female race: egen temp3=mean(interview) 
gen discrim=temp3-temp2
drop temp*
for num 0: sum interview if race==5 & female==X \ global mean=int(r(mean)*100) 
for num 0 \ any "Male": gr bar discrim if female==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.2 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
for num 1: sum interview if race==5 & female==X \ global mean=int(r(mean)*100) 
for num 1 \ any "Female": gr bar discrim if female==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.2 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
gr combine _0 _1, ycommon
drop discrim

* Graph by job type
bysort jobtype: egen temp1=mean(interview) if race==5 
bysort jobtype: egen temp2=max(temp1) 
bysort jobtype race: egen temp3=mean(interview) 
gen discrim=temp3-temp2
drop temp*

for num 1: sum interview if race==5 & jobtype==X \ global mean=int(r(mean)*100) 
for num 1 \ any "Waitstaff": gr bar discrim if jobtype==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.25 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
for num 2: sum interview if race==5 & jobtype==X \ global mean=int(r(mean)*100) 
for num 2 \ any "Data Entry": gr bar discrim if jobtype==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.25 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
for num 3: sum interview if race==5 & jobtype==X \ global mean=int(r(mean)*100) 
for num 3 \ any "Customer Service": gr bar discrim if jobtype==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.25 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
for num 4: sum interview if race==5 & jobtype==X \ global mean=int(r(mean)*100) 
for num 4 \ any "Sales": gr bar discrim if jobtype==X & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.25 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("Y") name(_X, replace) nodraw
gr combine _1 _2 _3 _4, ycommon
*for num 1: gr hist interview if jobtype==X, over(race)
drop discrim

* Graph by city
bysort city: egen temp1=mean(interview) if race==5 
bysort city: egen temp2=max(temp1) 
bysort city race: egen temp3=mean(interview) 
gen discrim=temp3-temp2
drop temp*
for any Brisbane: sum interview if race==5 & city=="X" \ global mean=int(r(mean)*100) 
for any Brisbane: gr bar discrim if city=="X" & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.2 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("X") name(X, replace) nodraw
for any Melbourne: sum interview if race==5 & city=="X" \ global mean=int(r(mean)*100) 
for any Melbourne: gr bar discrim if city=="X" & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.2 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("X") name(X, replace) nodraw
for any Sydney: sum interview if race==5 & city=="X" \ global mean=int(r(mean)*100) 
for any Sydney: gr bar discrim if city=="X" & race<=4, over(race) blabel(bar, position(outside) format(%9.2f)) ysc(r(-.2 .05)) ylab(-.2 "-20%" -.1 "-10%" 0 "Same") yti("Difference from Anglo") text(.05 2 "Anglo=$mean%",color(red) place(e)) ti("X") name(X, replace) nodraw
gr combine Brisbane Melbourne Sydney, ycommon
*/

* Benchmark against education
gen education=1 if cvnumber==1 | cvnumber==5 | cvnumber==9 | cvnumber==13
replace education=2 if cvnumber==2 | cvnumber==6 | cvnumber==10 | cvnumber==14
replace education=3 if cvnumber==3 | cvnumber==7 | cvnumber==11 | cvnumber==15
replace education=4 if cvnumber==4 | cvnumber==8 | cvnumber==12 | cvnumber==16
label define education 1 "Year 12" 2 "TAFE" 3 "Brick Uni" 4 "Sandstone Uni"
label values education education
for num 1/4: xi: reg interview i.education if jobtype==X
bysort education: tabstat interview, by(race) stat(mean n) format(%9.2f)

* Regressions
for any a_aborig a_italian a_chinese a_mideast: recode X .=0
xi: areg interview a_aborig a_italian a_chinese a_mideast i.cvnumber i.city, a(company_unique_id)

************************************************
* Interactions with employer characteristics
************************************************
gen contact_female=.
replace contact_female=1 if gender=="female" | gender=="Female" | gender=="femele"
replace contact_female=0 if gender=="male" 
sort company_unique_id 
gen n=_n
tsset n
replace contact_female=l.contact_female if company_unique_id==l.company_unique_id & contact_female==.
replace contact_female=l.contact_female if company_unique_id==l.company_unique_id & contact_female==.
replace contact_female=l.contact_female if company_unique_id==l.company_unique_id & contact_female==.
replace contact_female=f.contact_female if company_unique_id==f.company_unique_id & contact_female==.
replace contact_female=f.contact_female if company_unique_id==f.company_unique_id & contact_female==.
replace contact_female=f.contact_female if company_unique_id==f.company_unique_id & contact_female==.

gen responder_female=.
replace responder_female=1 if gender=="female" | gender=="Female" 
replace responder_female=0 if gender=="male" | gender=="mall"
replace responder_female=l.responder_female if company_unique_id==l.company_unique_id & responder_female==.
replace responder_female=l.responder_female if company_unique_id==l.company_unique_id & responder_female==.
replace responder_female=l.responder_female if company_unique_id==l.company_unique_id & responder_female==.
replace responder_female=f.responder_female if company_unique_id==f.company_unique_id & responder_female==.
replace responder_female=f.responder_female if company_unique_id==f.company_unique_id & responder_female==.
replace responder_female=f.responder_female if company_unique_id==f.company_unique_id & responder_female==.

*******************************************************
* Testing gender effects - for short gender paper
*******************************************************
* Combined
tabstat interview if race~=2,by(female) stat(mean n) format(%9.2f)
bysort female: egen temp1=mean(interview) if race~=2
gen temp2=temp1 if female==1 & race~=2
egen temp3=mode(temp2)
gen ratio=temp3/temp1
drop temp* 
tabstat ratio,by(female) stat(mean) format(%9.2f)
drop ratio
tabstat interview if race~=2,by(female) stat(mean n) format(%9.2f)
prtest interview if race~=2,by(female) 

* By job
for num 1/4: bysort female: egen temp1=mean(interview) if jobtype==X & race~=2 \ gen temp2=temp1 if female==1 & jobtype==X & race~=2 \ egen temp3=mode(temp2) if jobtype==X \ gen ratio_X=temp3/temp1 if jobtype==X \ drop temp* 
for num 1/4: tabstat ratio_X, by(female) stat(mean) format(%9.2f)
drop ratio*
bysort jobtype: tabstat interview if race~=2,by(female) stat(mean n) format(%9.2f)
bysort jobtype: prtest interview if race~=2,by(female) 
* Does gender discrimination differ between data entry & sales?
for num 1/4: gen fem_jobX=0 \ replace fem_jobX=1 if jobtype==X & female==1
xi: dprobit interview female fem_job* i.race i.city i.jobtype if race~=2 
testparm fem_job*

* By city
bysort city: tabstat interview if race~=2,by(female) stat(mean n) format(%9.2f)

* By gender of contact person
gen ff=female*contact_female
xi: dprobit interview female i.race i.city i.jobtype if race~=2,
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("Full sample") addstat("Pseudo R2",e(r2_p)) adec(2) 
xi: dprobit interview female ff contact_female i.race i.city i.jobtype if race~=2
xi: dprobit interview female i.race i.city i.jobtype if e(sample)
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact gender known") addstat("Pseudo R2",e(r2_p)) adec(2) 
xi: dprobit interview female ff contact_female i.race i.city i.jobtype if race~=2
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact gender known") addstat("Pseudo R2",e(r2_p)) adec(2) 

* By gender of contact or response person
gen contact_or_responder_fem=contact_female
replace contact_or_responder_fem=responder_female if contact_or_responder_fem==.
replace ff=female*contact_or_responder_fem
xi: dprobit interview female ff contact_or_responder_fem i.race i.city i.jobtype if race~=2
xi: dprobit interview female i.race i.city i.jobtype if e(sample)
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact gender known") addstat("Pseudo R2",e(r2_p)) adec(2) 
xi: dprobit interview female ff contact_or_responder_fem i.race i.city i.jobtype if race~=2
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact gender known") addstat("Pseudo R2",e(r2_p)) adec(2) 
tabstat contact_female if e(sample),by(jobtype)
tabstat contact_or_responder_fem if e(sample),by(jobtype)

gen hr_firm=0
for any People Recruitment Recruiting HR Staffing Personnel Hired "On-Hire" "Human Resources" "Employment Agencies": replace hr_firm=1 if strmatch(nameofthecompany,"*X*")==1
gen hr_firm_fem=female*hr_firm
xi: dprobit interview female hr_firm_fem hr_firm i.race i.city i.jobtype if race~=2
*outreg using genderdiscrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HR firm interaction") addstat("Pseudo R2",e(r2_p)) adec(2) 

xi: areg interview female ff i.cvnumber i.city, a(company_unique_id)
replace ff=female*responder_female
xi: dprobit interview female ff responder_female i.cvnumber i.city i.jobtype i.race
xi: areg interview female ff i.cvnumber i.city, a(company_unique_id)
replace ff=female*contact_female if ff==.
gen cont_or_resp_female=contact_female
replace cont_or_resp_female=responder_female if cont_or_resp==.
xi: dprobit interview female ff cont_or_resp_female i.cvnumber i.city i.jobtype i.race
xi: areg interview female ff i.cvnumber i.city, a(company_unique_id)
sum contact_female responder_female

for any aborig italian chinese mideast: gen f_X=a_X*contact_female
xi: areg interview a_aborig a_italian a_chinese a_mideast f_aborig f_italian f_chinese f_mideast i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast contact_female f_aborig f_italian f_chinese f_mideast i.cvnumber i.city i.jobtype

for any aborig italian chinese mideast: replace f_X=a_X*responder_female
xi: areg interview a_aborig a_italian a_chinese a_mideast f_aborig f_italian f_chinese f_mideast i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast contact_female f_aborig f_italian f_chinese f_mideast i.cvnumber i.city i.jobtype

for any aborig italian chinese mideast: replace f_X=a_X*contact_female if f_X==.
xi: areg interview a_aborig a_italian a_chinese a_mideast f_aborig f_italian f_chinese f_mideast i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast contact_female f_aborig f_italian f_chinese f_mideast i.cvnumber i.city i.jobtype

gen contact_nonanglo=.
*for any AFRICAN "EAST ASIAN & PACIFIC" GREEK HISPANIC INTERNATIONAL "JEWISH AND ARMENIAN" MUSLIM SIKH "SOUTH ASIAN": replace contact_nonanglo=1 if contact_ethnicity_group=="X"
*for any NORDIC CELTIC ENGLISH EUROPEAN: replace contact_nonanglo=0 if contact_ethnicity_group=="X"
for any AFRICAN "EAST ASIAN & PACIFIC" EUROPEAN NORDIC GREEK HISPANIC INTERNATIONAL "JEWISH AND ARMENIAN" MUSLIM SIKH "SOUTH ASIAN": replace contact_nonanglo=1 if contact_ethnicity_group=="X"
for any CELTIC ENGLISH : replace contact_nonanglo=0 if contact_ethnicity_group=="X"
gen response_nonanglo=.
*for any AFRICAN "EAST ASIAN & PACIFIC" GREEK HISPANIC INTERNATIONAL "JEWISH AND ARMENIAN" MUSLIM SIKH "SOUTH ASIAN": replace response_nonanglo=1 if response_ethnicity_group=="X"
*for any NORDIC CELTIC ENGLISH EUROPEAN: replace response_nonanglo=0 if response_ethnicity_group=="X"
for any AFRICAN "EAST ASIAN & PACIFIC" EUROPEAN NORDIC GREEK HISPANIC INTERNATIONAL "JEWISH AND ARMENIAN" MUSLIM SIKH "SOUTH ASIAN": replace response_nonanglo=1 if response_ethnicity_group=="X"
for any CELTIC ENGLISH : replace response_nonanglo=0 if response_ethnicity_group=="X"
sum contact_nonanglo response_nonanglo
tab contact_ethnicity_group

**** Do non-Anglos as a group behave differently?
for any aborig italian chinese mideast: gen na_X=a_X*contact_nonanglo
*xi: areg interview a_aborig a_italian a_chinese a_mideast na_* i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast na_* contact_nonanglo i.cvnumber i.city i.jobtype
testparm na_*
*outreg using discrim1.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("Contact non-anglo") addstat("Pseudo R2",e(r2_p)) adec(2) 
for any aborig italian chinese mideast: replace na_X=a_X*response_nonanglo
*xi: areg interview a_aborig a_italian a_chinese a_mideast na_* i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast na_* response_nonanglo i.cvnumber i.city i.jobtype
testparm na_*
*outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Responder non-anglo") addstat("Pseudo R2",e(r2_p)) adec(2) 
for any aborig italian chinese mideast: replace na_X=a_X*contact_nonanglo \ replace na_X=a_X*response_nonanglo if na_X==.
gen cont_or_resp_nonanglo=contact_nonanglo
replace cont_or_resp_nonanglo=response_nonanglo if cont_or_resp_nonanglo==.
*xi: areg interview a_aborig a_italian a_chinese a_mideast na_* i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast na_* cont_or_resp_nonanglo i.cvnumber i.city i.jobtype
testparm na_*
*outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact or responder non-anglo") addstat("Pseudo R2",e(r2_p)) adec(2) 

**** Is there an effect of being same race as the applicant? (Excl Aboriginal applicants)
for any italian chinese mideast: gen c_X=0 if contact_nonanglo~=. \ gen r_X=0 if response_nonanglo~=.
replace c_italian=1 if contact_ethnicity_group=="EUROPEAN" | contact_ethnicity_group=="GREEK"
replace c_chinese=1 if contact_ethnicity_group=="EAST ASIAN & PACIFIC" | contact_ethnicity_group=="SOUTH ASIAN"
replace c_mideast=1 if contact_ethnicity_group=="MUSLIM"
replace r_italian=1 if response_ethnicity_group=="EUROPEAN" | response_ethnicity_group=="GREEK"
replace r_chinese=1 if response_ethnicity_group=="EAST ASIAN & PACIFIC" | response_ethnicity_group=="SOUTH ASIAN"
replace r_mideast=1 if response_ethnicity_group=="MUSLIM"
for any italian chinese mideast: gen same_X=a_X*c_X
*xi: dprobit interview a_italian a_chinese a_mideast c_* same_* i.cvnumber i.city i.jobtype if aborig~=1
xi: dprobit interview a_aborig a_italian a_chinese a_mideast c_* same_* i.cvnumber i.city i.jobtype 
testparm same_*
*outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact same race") addstat("Pseudo R2",e(r2_p)) adec(2) 
for any italian chinese mideast: replace same_X=a_X*r_X
*xi: dprobit interview a_italian a_chinese a_mideast r_* same_* i.cvnumber i.city i.jobtype if aborig~=1
xi: dprobit interview a_aborig a_italian a_chinese a_mideast r_* same_* i.cvnumber i.city i.jobtype 
*outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Responder same race") addstat("Pseudo R2",e(r2_p)) adec(2) 
for any italian chinese mideast: replace same_X=a_X*c_X if same_X==. \ replace same_X=1 if a_X==1 & r_X==1 
gen c_miss=0
gen r_miss=0
for any italian chinese mideast: replace c_miss=1 if c_X==. & r_X~=. \ replace r_miss=1 if r_X==. & c_X~=. 
for any italian chinese mideast: recode c_X .=0 if c_miss==1 \ recode r_X .=0 if r_miss==1
*xi: dprobit interview a_italian a_chinese a_mideast same_* r_* c_* i.cvnumber i.city i.jobtype if aborig~=1
xi: dprobit interview a_aborig a_italian a_chinese a_mideast same_* r_* c_* i.cvnumber i.city i.jobtype 
*outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Contact or responder same race") addstat("Pseudo R2",e(r2_p)) adec(2) 
testparm same_*

* Postcode effects
* First, we match on a file that cleans up a lot of the garbled company locations
drop _merge
sort city companylocation
merge city companylocation using match_file_garbled_companylocation, nokeep
replace companylocation=newcompanylocation if _merge==3
drop newcompanylocation _merge

drop postcode suburb
gen postcode=real(companylocation)
for num 2000 3000 4000 \ any 2001 3001 4001: replace postcode=X if companylocation=="Y"
gen suburb=companylocation
for any Brisbane Melbourne Sydney: replace suburb="X" if (word(suburb,1)=="CBD" | word(suburb,-1)=="St" | word(suburb,-1)=="St." | word(suburb,-1)=="Street" | suburb=="Inner City" | word(suburb,1)=="city") & city=="X"
for any Chastwood \ any Chatswood: replace suburb="Y" if suburb=="X"
replace suburb=subinstr(suburb,"Cambe","Campbe",.)
replace suburb=subinstr(suburb,"St. ","St ",.)
replace suburb=subinstr(suburb,"Mt. ","Mount ",.)
replace suburb=subinstr(suburb,"St.","St ",.)
replace suburb=subinstr(suburb,"Mt.","Mount ",.)
replace suburb=subinstr(suburb,"Mt ","Mount ",.)
replace suburb=upper(suburb)
sort suburb city
merge suburb city using postcode_suburb_xwalk, nokeep
replace postcode=postcode1 if postcode==.
drop _merge
sort postcode 
merge postcode using "C:\Documents and Settings\leigha\My publications\Audit Discrimination\temp_postcode_for_merge", nokeep
tab _merge
sort companylocation
codebook companylocation if _merge~=3 & companylocation~=""
egen tag=tag(companylocation city) if _merge~=3 & companylocation~=""
list city companylocation if _merge~=3 & companylocation~="" & tag, sep(0)
drop _merge tag
sort postcode
merge postcode using temp_postcode_for_merge,nokeep
tab _merge
drop _merge

save data_for_reanalysis, replace
drop nameofthecompany nameofcontacperson gender notes nameofrespperson genderofrespperson note var1
save "data_for_reanalysis (confidentialised).dta", replace
STOP

* Overseas-born in postcode
for any aborig italian chinese mideast: gen osb_X=a_X*os_born
xi: areg interview a_aborig a_italian a_chinese a_mideast osb_* os_born i.cvnumber i.city, a(company_unique_id)
xi: dprobit interview a_aborig a_italian a_chinese a_mideast osb_* os_born i.cvnumber i.city i.jobtype
outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("OS Born share") addstat("Pseudo R2",e(r2_p)) adec(2) 

* Share born in same country (by postcode)
drop same_*
for any aborig italian chinese mideast \ any indig_share italian_born chinese_born mideast_born: gen same_X=a_X*Y
xi: dprobit interview a_aborig a_italian a_chinese a_mideast same_* indig_share italian_born chinese_born mideast_born i.cvnumber i.city i.jobtype
outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Born in same country") addstat("Pseudo R2",e(r2_p)) adec(2) 
* One-by-one
for any aborig italian chinese mideast \ any indig_share italian_born chinese_born mideast_born: xi: dprobit interview same_X Y i.cvnumber i.city i.jobtype if a_X==1

* Share of same ancestry (by postcode)
drop same_*
for any aborig italian chinese mideast \ any indig_ancestry italian_ancestry chinese_ancestry mideast_ancestry : gen same_X=a_X*Y
xi: dprobit interview a_aborig a_italian a_chinese a_mideast same_* indig_ancestry italian_ancestry chinese_ancestry mideast_ancestry i.cvnumber i.city i.jobtype
outreg using discrim1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Same ancestry") addstat("Pseudo R2",e(r2_p)) adec(2) 

* With postcode FE?
*xi: dprobit interview a_aborig a_italian a_chinese a_mideast same_* i.cvnumber i.city i.jobtype i.postcode
*xi: xtlogit interview a_aborig a_italian a_chinese a_mideast same_* i.cvnumber i.city i.jobtype,i(postcode) fe

* Share Muslim?
xi: dprobit interview muslim i.cvnumber i.city i.jobtype if a_mideast==1

* Alphabetical order effects?
bysort company_unique_id (surname): egen aorder=seq()
xi: areg interview aorder a_aborig a_italian a_chinese a_mideast i.cvnumber i.city, a(company_unique_id)

***************************************
* Graphing our results against those from other studies
***************************************
set scheme s1mono
use "C:\Users\aleigh\My publications\Audit Discrimination\summary_table.dta", clear
for var study country year: replace X=trim(X) \ replace X=X[_n-1] if X==""
gen blv=1 if country=="Australia" & year=="2007"
la var ratio "Discrimination"
gen country_minority=country+"-"+minority
encode country_minority, generate(cm)
encode minority, generate(min)
replace minority=minority+" "+substr(year,1,4)
replace minority=" African-American 2001" if study=="Bertrand and Mullainathan (2004)"
for any Australia Netherlands Sweden France Germany UK US Canada Ireland \ any AUS NLD SWE FRA DEU GBR USA CAN IRE: replace country="Y" if country=="X"
*replace country="AUS-BLV" if country=="AUS" & year=="2007"
gen yr=real(substr(year,1,4))
drop if country=="FRA"
*graph bar ratio if ratio>=1, ysc(r(1 2)) exclude0 ylab(1 1.5 2) nofill over(minority,label(angle(vertical)) sort(yr)) over(country,gap(450)) yti("Majority callback /" "minority callback")  note("Note: Chart omits Bovenkerk et al. (1979), who found a ratio of 3.47 for Antillian job applicants in France in 1976." "Two US estimates for 2001 are from different studies.") xsize(7)
graph bar ratio if ratio>=1, ysc(r(1 2)) exclude0 ylab(1 1.5 2) nofill over(minority,label(angle(vertical)) sort(yr)) over(country,gap(450)) yti("Majority callback /" "minority callback")  note("Note: Chart omits Bovenkerk et al. (1979), who found a ratio of 3.47 for Antillian job applicants in France in 1976.") xsize(7)
codebook study
sum ratio
*text(1.8 1 "*") text(1.8 3 "*") text(1.8 4 "*") text(1.8 5 "*") 
* || bar (mean) ratio if blv==1, over(minority,label(angle(vertical))) over(country)

***************************
* Creating graphs on shares across the C20th
****************************
*FOR PPT set scheme s2color
set scheme s1mono
use census_ethnic_breakdowns, clear
tsset censusyear
tsfill
replace protestant=protestant+catholic
tw area protestant catholic censusyear, xti("") yti("") ylab(0 "0%" 20 "20%" 40 "40%" 60 "60%" 80 "80%" 100 "100%")

la var indig "Indigenous"
la var italy "Italian-born"
la var china "Chinese-born"
la var middleeast "Middle Eastern-born"
replace italy=italy+indig
replace china=china+italy
replace middleeast=middleeast+china
tw area middleeast china italy indig censusyear, xti("") yti("") ylab(0 "0%" 1 "1%" 2 "2%" 3 "3%" 4 "4%" 5 "5%" 6 "6%")


* Setting up a postcode to suburb xwalk
cd "C:\Users\aleigh\Datasets\Crime Statistics\Unemployment\"
use sla_pcode_xwalk_2001 if stateno<=3, clear
gen city="Sydney" if stateno==1
replace city="Melbourne" if stateno==2
replace city="Brisbane" if stateno==3
ren locality suburb
keep suburb postcode1 city
sort suburb city
cd "C:\Users\aleigh\My publications\Audit Discrimination\
save postcode_suburb_xwalk.dta, replace

*********************************
* Scatterplot comparing results across gender discrim studies
*********************************
use  "C:\Users\aleigh\My publications\Audit Discrimination\comparison_gender_results.dta",clear
gen label="RR87" if occupation=="Riach & Rich 1987"
replace label="RR06" if occupation=="Riach & Rich 2006"
replace label="BL08" if occupation=="Booth & Leigh 2008"
replace label=label[_n-1] if label==""
drop if occupation=="Total" | occupation=="Riach & Rich 1987" | occupation=="Riach & Rich 2006" | occupation=="Booth & Leigh 2008"
* RR87 don't have precise share data on two of their occupations
drop if occupation=="Computer operator" | occupation=="Industrial relations officer"
replace f_share=f_share/100
set scheme s1mono
tw scatter fm_ratio f_share,mlabel(label) || lfit fm_ratio f_share, xti("Share female in occupation") yti("Female callbacks / Male callbacks") ti("Fig 1: Gender Discrimination and Female Share") note("Source: RR87=Riach & Rich 1987, RR06=Riach & Rich 2006, BL08=This study") legend(off)
